## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
To use synapsis, you will need the following packages:
And to run this tutorial, you will need:
devtools::install_git('https://gitlab.svi.edu.au/lmcneill/synapsis')
library(synapsis)
library(EBImage)
##
## Attaching package: 'EBImage'
## The following object is masked from 'package:purrr':
##
## transpose
For the moment, we will use the example images which come with synapsis.
path = paste0(system.file("extdata",package = "synapsis"))
The example images are from the same slide. One of them “-RGB” is a false colour three channel image. The other three are the single channels corresponding to specific antibodies or stains. For now, we will take a look at the sample image
file_3channel <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-RGB.jpeg")
image_3channel <- readImage(file_3channel)
display(image_3channel)
After a look at the image, we note that SYCP3 and MLH3 antibodies illuminate synaptonemal complexes (red) and foci (green) respectively. DAPI stains cells (blue). In functions, channel1_string refers to the antibody/ identifier for the foci channel (green here). It defaults to *-MLH3. channel2_string is the antibody/ identifier for the synaptonemal complex / dna channel (red here), which defaults to *-SYCP3. channel3_string is optional antibody/ identifier for a third channel. It defaults to *DAPI. We can split these up
r = channel(image_3channel,"r")
g = channel(image_3channel,"g")
b = channel(image_3channel,"b")
Let’s confirm that this is consistent with our filenames.
display(g)
Now compare to the file ending in MLH3
file_MLH3 <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-MLH3.jpeg")
image_MLH3 <- readImage(file_MLH3)
display(image_MLH3)
They should be the same.
display(r)
Now compare to the file ending in SYCP3
file_SYCP3 <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-SYCP3.jpeg")
image_SYCP3 <- readImage(file_SYCP3)
display(image_SYCP3)
That is a short introduction to the file naming convention in synapsis. We will go into more detail at the end of the tutorial, particularly what this means for your dataset.
And DAPI channel
display(b)
Now compare to the file ending in SYCP3
file_DAPI <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-DAPI.jpeg")
image_DAPI <- readImage(file_DAPI)
display(image_DAPI)
You can type
??auto_crop_fast
There is an annotation setting, which we switch to “on”. max_cell_area and min_cell_area have been calibrated to our data set, where the subject are mouse cells and magnification kept constant. You could run it on the first five images by setting e.g. test_amount = 5. But for now we will just look at the single image.
auto_crop_fast(path, annotation = "on", max_cell_area = 30000, min_cell_area = 7000, test_amount = 1)
## [1] "couldn't crop it since cell is on the edge. Neglected the following mask of a cell candidate:"
## [1] "from the file:"
## [1] "MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006"
## [1] "I cropped this cell:"
## [1] "using this mask"
## [1] "whose cell number is"
## [1] 2
## [1] "out of"
## [1] 0
## [1] "images, we got"
## [1] 2
## [1] "viable cells"
Here we called path, plus other optional parameters (that would otherwise take on default values). But only path is essential. This is because auto_crop_fast has built-in default values which are assumed when the user doesn’t specify.
A crops folder with three channels per “viable cell” should have been generated inside the folder where these images are kept i.e. in path.
SYCP3_stats <- get_pachytene(path,ecc_thresh = 0.8, area_thresh = 0.04, annotation = "on")
## [1] "decided the following is pachytene"
## [1] "number of cells kept"
## [1] 1
SYCP3_stats is a data frame summarizing some features of the cells classified as pachytene. ## Counting foci
foci_counts <- count_foci(path,offset_factor = 3, brush_size = 3, brush_sigma = 3, annotation = "on",stage = "pachytene")
## [1] "cell counter is"
## [1] 1
## [1] "original images"
## [1] "displaying resulting foci count"
## [1] "Overlay two channels"
## [1] "coincident foci"
## [1] "two channels, only coincident foci"
## [1] "which counts this many foci:"
## [1] 29
## [1] "number of alone foci"
## [1] 51
## [1] "percentage of foci channel coincident:"
## [1] 49.84026
foci_counts is a data frame summarizing some features (including foci counts) of the cells classified as pachytene.
df_dist <- measure_distances(path, annotation = "on")
## [1] "looking at cell number:"
## [1] 1
## [1] "foci located at"
## [1] 76.92857 50.85714 70.06667 72.73333
## [1] "total distance is"
## [1] 47.97056
## [1] "with foci locations included (actual)"
## [1] "with foci locations included, on the line"
## [1] "here are the results for this SC"
## [1] "the distance strands measure"
## [1] 16.82843 31.14214
## [1] "distance between foci (in pixels) was"
## [1] 24.89949
## [1] "distance as a fraction of total length is"
## [1] 0.5190578
## [1] "This strand managed to pass through."
## [1] "Check that it walked successfully. Total length in red, distance between two in white, and foci locations in magenta."
Now we will run auto_crop on the bigger data set (silently/ without annotation) to do some significance testing.
start_time <- as.numeric(as.numeric(Sys.time())*1000, digits=15) # place at start
auto_crop_fast(path, annotation = "off", max_cell_area = 30000, min_cell_area = 7000,third_channel = "on")
end_time <- as.numeric(as.numeric(Sys.time())*1000, digits=15)
print(end_time - start_time)
Now let’s count the foci for each genotype.
SYCP3_stats <- get_pachytene(path,ecc_thresh = 0.8, area_thresh = 0.04, annotation = "off")
foci_counts <- count_foci(path,offset_factor = 3, brush_size = 3, brush_sigma = 3, annotation = "off",stage = "pachytene")
Instead of evaluating, we will load the resulting dataframe from executing the above chunk on a large dataset. In this example data set (images taken by Vanessa Tsui in 2020 at SVI), there are 174 images where auto_crop_fast yields 243 crops. This took 1.2 minutes.
demo_path <-paste0(system.file("extdata",package = "synapsis"),"/")
foci_count_path <- paste0(demo_path, "df_foci_count.csv")
foci_counts <- read.csv(foci_count_path)
### comparing groups
counts <- foci_counts$foci_count
counts_mod <- foci_counts[as.numeric(foci_counts$foci_count) > 0,]
counts_mod <- foci_counts[as.numeric(foci_counts$foci_count) < 40,]
#counts_mod <- counts_mod[as.numeric(counts_mod$percent_on) > 0.55,]
# counts_mod <- counts_mod[as.numeric(counts_mod$sd_foci) <20,]
counts <- counts_mod$foci_count
counts_KO <- counts_mod[counts_mod$genotype == "Fancm-/-",]
counts_WT <- counts_mod[counts_mod$genotype == "Fancm+/+",]
count_KO <- counts_KO$foci_count
count_WT <- counts_WT$foci_count
mean(as.numeric(count_KO), na.rm= TRUE)
## [1] 29.80851
mean(as.numeric(count_WT), na.rm= TRUE)
## [1] 20.84848
sd(as.numeric(count_KO), na.rm= TRUE)
## [1] 5.651619
sd(as.numeric(count_WT), na.rm= TRUE)
## [1] 10.54787
c1 <- rgb(173,216,230,max = 255, alpha = 140, names = "lt.blue")
c4 <- rgb(255,180,50, max = 255, alpha = 120, names = "lt.orange")
A <- hist(as.numeric(count_WT),plot = FALSE)
B <- hist(as.numeric(count_KO), plot = FALSE )
plot(A,ylim = c(0,20), main = "Pachytene", col = c4, xlab = "foci count per cell")
plot(B, col = c1, add = TRUE)
## anova test
counts_mod$group <- factor(counts_mod$genotype, c("Fancm-/-", "Fancm+/+"))
outfit <- lm(foci_count ~ genotype, data=counts_mod)
outfit
##
## Call:
## lm(formula = foci_count ~ genotype, data = counts_mod)
##
## Coefficients:
## (Intercept) genotypeFancm+/+
## 29.81 -8.96
#df.residual(outfit)
#sigma(outfit)
#model.matrix(outfit)
outfit0 <- lm(foci_count ~ 1, data=counts_mod)
anova(outfit0, outfit)
## Analysis of Variance Table
##
## Model 1: foci_count ~ 1
## Model 2: foci_count ~ genotype
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 79 6586.0
## 2 78 5029.5 1 1556.5 24.138 4.841e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(as.numeric(count_KO),as.numeric(count_WT))
##
## Welch Two Sample t-test
##
## data: as.numeric(count_KO) and as.numeric(count_WT)
## t = 4.4517, df = 44.931, p-value = 5.577e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.906032 13.014020
## sample estimates:
## mean of x mean of y
## 29.80851 20.84848
now that we have a p value we can paste this on the histogram
c1 <- rgb(173,216,230,max = 255, alpha = 140, names = "lt.blue")
c4 <- rgb(255,180,50, max = 255, alpha = 120, names = "lt.orange")
A <- hist(as.numeric(count_WT),plot = FALSE)
B <- hist(as.numeric(count_KO), plot = FALSE )
plot(A,ylim = c(0,20), main = "Pachytene", col = c4, xlab = "foci count per cell")
text(x = 10, y = 15, label = "anova p value = 0.01*", col = "black", cex = 1)
plot(B, col = c1, add = TRUE)
library(tidyverse)
counts_mod %>%
ggplot(aes(x=genotype, y=as.numeric(foci_count), fill = genotype)) +
geom_boxplot(width=0.5,lwd=1.5) +
geom_jitter(width=0.15) +
labs(subtitle="MLH3 foci counts")
We can also get some stats on the distances measured.
distances_path <- paste0(demo_path, "df_distances.csv")
df_dist <- read.csv(distances_path)
pass_only <- df_dist[df_dist$pass_fail == "pass",]
distances <- pass_only$fractional_distance
distances_KO <- pass_only[pass_only$genotype == "Fancm-/-",]
distances_WT <- pass_only[pass_only$genotype == "Fancm+/+",]
distance_KO <- distances_KO$fractional_distance
distance_WT <- distances_WT$fractional_distance
mean(as.numeric(distance_KO), na.rm= TRUE)
## [1] 0.4400367
mean(as.numeric(distance_WT), na.rm= TRUE)
## [1] 0.4232971
sd(as.numeric(distance_KO), na.rm= TRUE)
## [1] 0.160769
sd(as.numeric(distance_WT), na.rm= TRUE)
## [1] 0.1778392
c1 <- rgb(173,216,230,max = 255, alpha = 140, names = "lt.blue")
c4 <- rgb(255,180,50, max = 255, alpha = 120, names = "lt.orange")
A <- hist(as.numeric(distance_WT),plot = FALSE)
B <- hist(as.numeric(distance_KO), plot = FALSE )
plot(A,ylim = c(0,9),xlim = c(0,1), main = "Pachytene", col = c4, xlab = "foci distance as fraction of total length")
#text(x = 0.2, y = 7, label = "anova p value 0.9 (NS)", col = "black", cex = 1)
plot(B, col = c1, add = TRUE)
t.test(as.numeric(distance_KO),as.numeric(distance_WT))
##
## Welch Two Sample t-test
##
## data: as.numeric(distance_KO) and as.numeric(distance_WT)
## t = 0.36915, df = 46.189, p-value = 0.7137
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07452709 0.10800626
## sample estimates:
## mean of x mean of y
## 0.4400367 0.4232971